Importing the Data
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ readr 1.3.1
## ✔ tibble 2.0.1 ✔ purrr 0.2.5
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ ggplot2 3.1.1 ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
dat <- read_excel("ALL.xlsx")
dat <- select(dat, -Player2, -Player3)
Create Variables we need
#1
dat <- mutate(dat, RBIAB = RBI / AB)
dat$RBIAB <- round(dat$RBIAB, digits = 3)
##Runs batted in per at bat^
dat <- mutate(dat, HRAB = HR / AB)
dat$HRAB <- round(dat$HRAB, digits = 3)
##Home Runs per at bat^
dat <- mutate(dat, BABIP = (H - HR) / (AB - SO - HR + SF))
dat$BABIP <- round(dat$BABIP, digits = 3)
##Batting Average on Balls in Play^
dat$NP <- as.numeric(dat$NP) ##Cleaning the Data, changing column from character to Number
## Warning: NAs introduced by coercion
dat <- mutate(dat, NPPA = NP / PA)
dat$NPPA <- round(dat$NPPA, digits = 3)
##Number of pitches per plate appearance^
dat <- mutate(dat, NPAB = NP / AB)
dat$NPAB <- round(dat$NPAB, digits = 3)
##Number of Pitches per At bat per at bat^
dat <- mutate(dat, SOAB = SO / AB)
dat$SOAB <- round(dat$SOAB, digits = 3)
##Number of StrikeOuts per at bat^
Visualize the Data
ggplot(dat, aes(x = AVG)) + geom_histogram(binwidth = .002,color="Black", fill="light blue") + xlab("Batting Average") + ggtitle("Histogram of Batting Average") + ylab("Count")
ggplot(dat, aes(x = OPS)) + geom_histogram(binwidth = .005,color="Black", fill="light green") + xlab("On-Base + Slugging Percentage") + ggtitle("Histogram of OPS") + ylab("Count")
correlationMatrix3 <- select(dat, SLG, RBIAB)
cor(correlationMatrix3)
## SLG RBIAB
## SLG 1.0000000 0.7968354
## RBIAB 0.7968354 1.0000000
ggplot(dat, aes(x = RBIAB, y = SLG)) + geom_point(color = 'coral2') + geom_smooth(method='lm',formula=y~x) + xlab("RBI per At bat") + ggtitle("Relationship between Slugging Percentage and RBI's per At Bat") + ylab("Slugging Percentage") + annotate("Text", .325, .55, label = " R = .796 \n R^2 = .634")
correlationMatrix3 <- select(dat, RBI, SLG)
cor(correlationMatrix3)
## RBI SLG
## RBI 1.0000000 0.3106309
## SLG 0.3106309 1.0000000
Does getting more pitches in an at Bat increase the odds of hitting a homerun? PROBABLY WONT USE THIS
ggplot(dat, aes(x = HRAB, y = NPPA)) + geom_point() + geom_smooth(method='lm',formula=y~x) ## R_SQUARED
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
Which position has the best batting average?
dat2 <- dat
dat2%>%
select(Pos, AVG) %>%
mutate(AVG = ifelse(AVG == -9999, NA, AVG)) %>%
group_by(Pos) %>%
mutate(MEANAVG = mean(AVG, na.rm = T)) -> dat2
ggplot(dat2, aes(x = AVG)) + geom_histogram(binwidth = .002,color="Black", fill="goldenrod2") + xlab("Batting Average") + ggtitle("Histogram of Batting Average by Position") + ylab("Count") + facet_wrap(~Pos) + geom_vline(aes(xintercept = MEANAVG))
dat3 <- select(dat2, -AVG)
dat3 <- distinct(dat3) #ROUND TO 3 digits
dat3$MEANAVG <- round(dat3$MEANAVG, digits = 3)
head(dat3, 11)
## # A tibble: 10 x 2
## # Groups: Pos [10]
## Pos MEANAVG
## <chr> <dbl>
## 1 1B 0.293
## 2 SS 0.284
## 3 2B 0.289
## 4 3B 0.285
## 5 CF 0.286
## 6 RF 0.284
## 7 LF 0.290
## 8 C 0.288
## 9 DH 0.275
## 10 OF 0.288
# WOW, This makes no sense, as the lowest batting average on the field is the DH and the Right fielder
Teams batting average?
dat5 <- dat
dat5%>%
select(Team, AVG) %>%
mutate(AVG = ifelse(AVG == -9999, NA, AVG)) %>%
group_by(Team) %>%
mutate(MEANAVG = mean(AVG, na.rm = T)) -> dat5
dat6 <- select(dat5, -AVG)
dat6 <- distinct(dat6)
dat6$MEANAVG <- round(dat6$MEANAVG, digits = 3)
head(dat6, 10)
## # A tibble: 10 x 2
## # Groups: Team [10]
## Team MEANAVG
## <chr> <dbl>
## 1 LAD 0.288
## 2 CWS 0.288
## 3 MIN 0.285
## 4 NYM 0.290
## 5 CLE 0.287
## 6 WSH 0.296
## 7 STL 0.297
## 8 TEX 0.279
## 9 TB 0.282
## 10 CHC 0.290
Following Albert Pujols through his career
Albert <- dat[grepl("Pujols,A", dat$Player),]
ggplot(Albert, aes(x = Year)) +
geom_line(aes( y = AVG), color = "blue") +
geom_line(aes( y = SLG), color = "red") +
geom_line(aes( y = HRAB), color = "purple") +
geom_line(aes( y = RBIAB), color = "Orange") +
ylab("Averages") +
xlab("Year") +
ggtitle("Albert Pujols's Career mapped by Year") +
geom_vline(xintercept = 2010, color = "Red")
a <- ggplot(Albert, aes(x = Year)) +
geom_line(aes( y = HR), color = "blue") +
geom_line(aes( y = SO), color = "red") +
geom_line(aes( y = BB), color = "purple") +
geom_line(aes( y = RBI), color = "Orange") +
ylab("Totals By Category") +
xlab("Year") +
ggtitle("Albert Pujols's Career mapped by Year") +
geom_vline(xintercept = 2010, color = "Black") +
annotate("Text", 2012.5, 130, label = "Left ALL his fans \nand headed to the Angels. \nAll Stats Plummit")
ggplotly(a)
Trout <- dat[grepl("Trout,M", dat$Player),]
Trout <- Trout[-c(1),]
b <- ggplot(Trout, aes(x = Year)) +
geom_line(aes( y = AVG), color = "blue") +
geom_line(aes( y = BABIP), color = "red") +
geom_line(aes( y = HRAB), color = "purple") +
geom_line(aes( y = RBIAB), color = "Orange") +
ylab("Averages") +
xlab("Year") +
ggtitle("Mike Trout's Career mapped by Year")
ggplot(Trout, aes(x = Year)) +
geom_line(aes( y = HR), color = "blue") +
geom_line(aes( y = SB), color = "red") +
geom_line(aes( y = BB), color = "purple") +
geom_line(aes( y = RBI), color = "Orange") +
ylab("Totals By Category") +
xlab("Year") +
ggtitle("Mike Trout's Career mapped by Year") +
geom_vline(xintercept = 2017, color = "Black")
Do Homerun hitters have higher Strike Out percentages?
correlationMatrix2 <- select(dat, HRAB, SOAB)
cor(correlationMatrix2)
## HRAB SOAB
## HRAB 1.0000000 0.4033054
## SOAB 0.4033054 1.0000000
ggplot(dat, aes(x = HRAB, y = SOAB)) + geom_smooth(method='lm') + geom_point() + xlab("Home Runs Per at Bat") + ggtitle("Homerun Hitters and Strike Out Percentages") + ylab("Strike outs Per at Bat") + annotate("Text", .1225, .370, label = " R = .402 \n R^2 = .162")
Graph rivalry teams by runs scored.
RedSox <- dat[grepl("BOS", dat$Team),]
Yankees <- dat[grepl("NYY", dat$Team),]
Rivals <- full_join(RedSox, Yankees)
## Joining, by = c("RK", "Player", "Year", "Team", "Pos", "G", "AB", "R", "H", "2B", "3B", "HR", "RBI", "BB", "SO", "SB", "CS", "AVG", "OBP", "SLG", "OPS", "IBB", "HBP", "SAC", "SF", "TB", "XBH", "GDP", "GO", "AO", "GO_AO", "NP", "PA", "RBIAB", "HRAB", "BABIP", "NPPA", "NPAB", "SOAB")
jColors = c("#BD3039", "#0C2340")
ggplot(Rivals, aes(x = HR, y = AVG)) + geom_boxplot() + facet_wrap(~Team) + xlab("Average Home Runs Hit in a Season") + ylab("Batting Average") + ggtitle("Rivaly Between the Boston Red Sox and New York Yankees") + aes(fill = Team) + scale_fill_manual(values = jColors)
Does Avg # of pitches in an at bat correlate with batting Avg? Does it correlate with HRs?
##I CAN"T GET THE R STAT FOR THESE, SO WE WONT BE ABLE TO GET THE R^2
#This one proves the Null Hypothesis, that there is no Correlation
ggplot(dat, aes(x = AVG, y = NPAB)) + geom_smooth(method='lm') + geom_point() + xlab("Batting Average") + ggtitle("Relationship Between Average Number of Pitches in an at Bat to Batting Average") + ylab("Average Number of Pitches During an at Bat")## R_SQUARED
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
#This one has a slight positive correlation
ggplot(dat, aes(x = HRAB, y = NPAB)) + geom_smooth(method='lm') + geom_point() + xlab("Average Homeruns in an at Bat") + ggtitle("Relationship Between Average Number of Pitches in an at Bat to Batting Average") + ylab("Average Number of Pitches During an at Bat")## R_SQUARED
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
Distribution of BABIP with Buster Posey Highlighted
Posey <- dat[grepl("Posey,B", dat$Player),]
MeanBABIPposey <- mean(Posey$BABIP)
ggplot(dat, aes(x = BABIP,)) + geom_histogram(color = "black", fill = "white") +
geom_vline(xintercept = MeanBABIPposey, color = "Red") +
annotate("Text", .38, 135, label = "Buster Posey's Average BABIP")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Which teams hit the most homeruns?
dat7 <- dat
dat7%>%
select(Team, HRAB) %>%
mutate(HRAB = ifelse(HRAB == -9999, NA, HRAB)) %>%
group_by(Team) %>%
mutate(MeanHR = mean(HRAB, na.rm = T)) -> dat7
dat8 <- select(dat7, -HRAB)
dat8 <- distinct(dat8) #ROUND TO 3 digits
dat8$MeanHR <- round(dat8$MeanHR, digits = 3)
head(dat8, 10)
## # A tibble: 10 x 2
## # Groups: Team [10]
## Team MeanHR
## <chr> <dbl>
## 1 LAD 0.044
## 2 CWS 0.033
## 3 MIN 0.034
## 4 NYM 0.032
## 5 CLE 0.035
## 6 WSH 0.039
## 7 STL 0.044
## 8 TEX 0.037
## 9 TB 0.036
## 10 CHC 0.041
dat8 <- dat8[order(dat8$MeanHR, decreasing = TRUE),]
head(dat8, 33)
## # A tibble: 33 x 2
## # Groups: Team [33]
## Team MeanHR
## <chr> <dbl>
## 1 TOR 0.049
## 2 BAL 0.045
## 3 MIL 0.045
## 4 LAD 0.044
## 5 STL 0.044
## 6 FLA 0.042
## 7 CHC 0.041
## 8 COL 0.04
## 9 SEA 0.04
## 10 WSH 0.039
## # … with 23 more rows
library("Hmisc")
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:plotly':
##
## subplot
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(corrplot)
## corrplot 0.84 loaded
library(plotly)
cordata <- select(dat, AVG, OBP, SLG, BABIP, OPS, HR, AO, SO, RBI, BB, XBH, NPPA, SB)
cordata <- cor(cordata, use = "complete.obs")
cordata.rcorr = rcorr(as.matrix(cordata))
cordata.rcorr
## AVG OBP SLG BABIP OPS HR AO SO RBI BB XBH
## AVG 1.00 0.83 0.67 0.78 0.75 -0.42 -0.74 -0.85 -0.57 -0.62 -0.61
## OBP 0.83 1.00 0.79 0.66 0.90 -0.26 -0.80 -0.73 -0.52 -0.26 -0.57
## SLG 0.67 0.79 1.00 0.33 0.98 0.22 -0.57 -0.49 -0.11 -0.18 -0.17
## BABIP 0.78 0.66 0.33 1.00 0.46 -0.71 -0.88 -0.66 -0.81 -0.68 -0.82
## OPS 0.75 0.90 0.98 0.46 1.00 0.07 -0.67 -0.59 -0.25 -0.21 -0.31
## HR -0.42 -0.26 0.22 -0.71 0.07 1.00 0.60 0.65 0.92 0.72 0.90
## AO -0.74 -0.80 -0.57 -0.88 -0.67 0.60 1.00 0.74 0.84 0.64 0.87
## SO -0.85 -0.73 -0.49 -0.66 -0.59 0.65 0.74 1.00 0.77 0.71 0.80
## RBI -0.57 -0.52 -0.11 -0.81 -0.25 0.92 0.84 0.77 1.00 0.75 0.99
## BB -0.62 -0.26 -0.18 -0.68 -0.21 0.72 0.64 0.71 0.75 1.00 0.75
## XBH -0.61 -0.57 -0.17 -0.82 -0.31 0.90 0.87 0.80 0.99 0.75 1.00
## NPPA -0.07 0.43 0.27 0.18 0.34 -0.06 -0.48 -0.03 -0.29 0.22 -0.32
## SB -0.31 -0.53 -0.75 -0.09 -0.71 -0.44 0.26 0.10 -0.19 -0.16 -0.11
## NPPA SB
## AVG -0.07 -0.31
## OBP 0.43 -0.53
## SLG 0.27 -0.75
## BABIP 0.18 -0.09
## OPS 0.34 -0.71
## HR -0.06 -0.44
## AO -0.48 0.26
## SO -0.03 0.10
## RBI -0.29 -0.19
## BB 0.22 -0.16
## XBH -0.32 -0.11
## NPPA 1.00 -0.40
## SB -0.40 1.00
##
## n= 13
##
##
## P
## AVG OBP SLG BABIP OPS HR AO SO RBI
## AVG 0.0005 0.0120 0.0016 0.0029 0.1485 0.0039 0.0002 0.0426
## OBP 0.0005 0.0012 0.0132 0.0000 0.3980 0.0009 0.0048 0.0717
## SLG 0.0120 0.0012 0.2727 0.0000 0.4689 0.0436 0.0906 0.7119
## BABIP 0.0016 0.0132 0.2727 0.1180 0.0064 0.0000 0.0145 0.0008
## OPS 0.0029 0.0000 0.0000 0.1180 0.8107 0.0119 0.0336 0.4074
## HR 0.1485 0.3980 0.4689 0.0064 0.8107 0.0289 0.0157 0.0000
## AO 0.0039 0.0009 0.0436 0.0000 0.0119 0.0289 0.0039 0.0003
## SO 0.0002 0.0048 0.0906 0.0145 0.0336 0.0157 0.0039 0.0021
## RBI 0.0426 0.0717 0.7119 0.0008 0.4074 0.0000 0.0003 0.0021
## BB 0.0237 0.3993 0.5617 0.0101 0.4875 0.0057 0.0195 0.0060 0.0029
## XBH 0.0278 0.0425 0.5675 0.0006 0.2974 0.0000 0.0000 0.0011 0.0000
## NPPA 0.8092 0.1385 0.3713 0.5596 0.2596 0.8516 0.1005 0.9131 0.3401
## SB 0.2979 0.0597 0.0032 0.7747 0.0060 0.1313 0.3936 0.7466 0.5304
## BB XBH NPPA SB
## AVG 0.0237 0.0278 0.8092 0.2979
## OBP 0.3993 0.0425 0.1385 0.0597
## SLG 0.5617 0.5675 0.3713 0.0032
## BABIP 0.0101 0.0006 0.5596 0.7747
## OPS 0.4875 0.2974 0.2596 0.0060
## HR 0.0057 0.0000 0.8516 0.1313
## AO 0.0195 0.0000 0.1005 0.3936
## SO 0.0060 0.0011 0.9131 0.7466
## RBI 0.0029 0.0000 0.3401 0.5304
## BB 0.0030 0.4658 0.5974
## XBH 0.0030 0.2921 0.7212
## NPPA 0.4658 0.2921 0.1775
## SB 0.5974 0.7212 0.1775
palette = colorRampPalette(c("blue", "yellow", "red")) (20)
corrplot(cordata, col = palette) #dot graph
heatmap(x = cordata, col = palette, symm = TRUE) #heatmap
#go back and change color scheme
Chloropleth map
Stadiums <- read_excel("StadiumLocations.xlsx")
View(Stadiums)
states <- map_data("state")
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
base <- ggplot(states, aes(x=long, y=lat))+geom_polygon(aes(group=group))+ggtitle("MLB Teams")+ylab("_")+xlab(" ")
base
#Change this to look better, put labels, put colors,
#In the excel file, add averages for each team, make sure team names are able so we can display them
LAA <- subset(Stadiums, Stadiums$ABB == "LAA")
SF <- subset(Stadiums, Stadiums$ABB == "SF")
COL <- subset(Stadiums, Stadiums$ABB == "COL")
STL <- subset(Stadiums, Stadiums$ABB == "STL")
ARI <- subset(Stadiums, Stadiums$ABB == "ARI")
LAD <- subset(Stadiums, Stadiums$ABB == "LAD")
NYM <- subset(Stadiums, Stadiums$ABB == "NYM")
PHI <- subset(Stadiums, Stadiums$ABB == "PHI")
DET <- subset(Stadiums, Stadiums$ABB == "DET")
BOS <- subset(Stadiums, Stadiums$ABB == "BOS")
TEX <- subset(Stadiums, Stadiums$ABB == "TEX")
CIN <- subset(Stadiums, Stadiums$ABB == "CIN")
KC <- subset(Stadiums, Stadiums$ABB == "KC")
MIA <- subset(Stadiums, Stadiums$ABB == "MIA")
MIL <- subset(Stadiums, Stadiums$ABB == "MIL")
HOU <- subset(Stadiums, Stadiums$ABB == "HOU")
WSH <- subset(Stadiums, Stadiums$ABB == "WSH")
OAK <- subset(Stadiums, Stadiums$ABB == "OAK")
BAL <- subset(Stadiums, Stadiums$ABB == "BAL")
SD <- subset(Stadiums, Stadiums$ABB == "SD")
PIT <- subset(Stadiums, Stadiums$ABB == "PIT")
CLE <- subset(Stadiums, Stadiums$ABB == "CLE")
TOR <- subset(Stadiums, Stadiums$ABB == "TOR")
SEA <- subset(Stadiums, Stadiums$ABB == "SEA")
MIN <- subset(Stadiums, Stadiums$ABB == "MIN")
TB <- subset(Stadiums, Stadiums$ABB == "TB")
ATL <- subset(Stadiums, Stadiums$ABB == "ATL")
CWS <- subset(Stadiums, Stadiums$ABB == "CWS")
CHC <- subset(Stadiums, Stadiums$ABB == "CHC")
NYY <- subset(Stadiums, Stadiums$ABB == "NYY")
half1 <- base+geom_point(aes(x=Longitude, y=Latitude),data=LAA, colour="#003263", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=LAA, color="#BA0021", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=SF, colour="#FD5A1E", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=SF, color="#27251F", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=COL, colour="#C4CED4", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=COL, color="#33006F", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=STL, colour="#0C2340", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=STL, color="#C41E3A", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=ARI, colour="#E3D4AD", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=ARI, color="#A71930", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=LAD, colour="#EF3E42", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=LAD, color="#005A9C", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=NYM, colour="#FF5910", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=NYM, color="#002D72", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=PHI, colour="#002D72", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=PHI, color="#E81828", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=DET, colour="#FA4616", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=DET, color="#0C2340", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=BOS, colour="#0C2340", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=BOS, color="#BD3039", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=TEX, colour="#C0111F", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=TEX, color="#003278", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=CIN, colour="white", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=CIN, color="#C6011F", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=KC, colour="#BD9B60", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=KC, color="#004687", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=KC, colour="#BD9B60", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=KC, color="#004687", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=MIA, colour="#FF6600", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=MIA, color="#0077C8", size =5)
whole <- half1 + geom_point(aes(x=Longitude, y=Latitude),data=MIL, colour="#B6922E", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=MIL, color="#0A2351", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=HOU, colour="#EB6E1F", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=HOU, color="#002D62", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=WSH, colour="#14225A", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=WSH, color="#AB0003", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=OAK, colour="#EFB21E", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=OAK, color="#003831", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=BAL, colour="#000000", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=BAL, color="#DF4601", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=SD, colour="#A2AAAD", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=SD, color="#002D62", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=PIT, colour="#FDB827", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=PIT, color="#27251F", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=CLE, colour="#E31937", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=CLE, color="#0C2340", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=TOR, colour="#1D2D5C", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=TOR, color="#134A8E", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=SEA, colour="#005C5C", size=7)+geom_point(aes(x=Longitude,y=Latitude),data=SEA,color="#0C2C56", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=MIN, colour="#D31145", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=MIN, color="#002B5C", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=TB, colour="#8FBCE6", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=TB, color="#092C5C", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=ATL, colour="#13274F", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=ATL, color="#CE1141", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=CWS, colour="#C4CED4", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=CWS, color="#27251F", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=CHC, colour="#CC3433", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=CHC, color="#0E3386", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=NYY, colour="#0C2340", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=NYY, color="red", size =5)
library(ggrepel)
#change team names to just team, not with city?
#add batting avgs
#change colors and size of text
whole+geom_label(aes(x=Longitude, y=Latitude), data=Stadiums %>% filter(Longitude <0 & Longitude> -130), label=Stadiums$`ABR.`, color="black", size=3, nudge_y = 1, label.padding = unit(.1, "lines"))+geom_text(aes(x=Longitude, y=Latitude), data=Stadiums %>% filter(Longitude <0 & Longitude> -130), label=Stadiums$`AVG`, color="red", size=2, nudge_y = -1.5)
#ggplot(states, aes(x=long, y=lat))+geom_polygon(aes(group=group))+geom_point(aes(x=Longitude, y=Latitude), data=Stadiums %>% filter(Longitude <0 & Longitude> -130), size=2, color="red") +geom